name: Conformance Cluster Mesh (ci-clustermesh)

# Any change in triggers needs to be reflected in the concurrency group.
on:
  workflow_dispatch:
    inputs:
      PR-number:
        description: "Pull request number."
        required: true
      context-ref:
        description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
        required: true
      SHA:
        description: "SHA under test (head of the PR branch)."
        required: true
      extra-args:
        description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
        required: false
        default: '{}'

  push:
    branches:
      - main
      - ft/main/**
    paths-ignore:
      - 'Documentation/**'

# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
  # To be able to access the repository with actions/checkout
  contents: read
  # To allow retrieving information from the PR API
  pull-requests: read
  # To be able to set commit status
  statuses: write

concurrency:
  # Structure:
  # - Workflow name
  # - Event type
  # - A unique identifier depending on event type:
  #   - push: SHA
  #   - workflow_dispatch: PR number
  #
  # This structure ensures a unique concurrency group name is generated for each
  # type of testing, such that re-runs will cancel the previous run.
  group: |
    ${{ github.workflow }}
    ${{ github.event_name }}
    ${{
      (github.event_name == 'push' && github.sha) ||
      (github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
    }}
  cancel-in-progress: true

env:
  # renovate: datasource=github-releases depName=kubernetes-sigs/kind
  kind_version: v0.22.0
  # renovate: datasource=docker depName=kindest/node
  k8s_version: v1.29.1
  cilium_cli_ci_version:
  CILIUM_CLI_MODE: helm
  clusterName1: cluster1-${{ github.run_id }}
  clusterName2: cluster2-${{ github.run_id }}
  contextName1: kind-cluster1-${{ github.run_id }}
  contextName2: kind-cluster2-${{ github.run_id }}
  check_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}

jobs:
  commit-status-start:
    if: ${{ github.event_name != 'push' }}
    name: Commit Status Start
    runs-on: ubuntu-latest
    steps:
      - name: Set initial commit status
        uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
        with:
          sha: ${{ inputs.SHA || github.sha }}

  installation-and-connectivity:
    name: Installation and Connectivity Test
    runs-on: ${{ vars.GH_RUNNER_EXTRA_POWER }}
    timeout-minutes: 60
    env:
      job_name: "Installation and Connectivity Test"

    strategy:
      fail-fast: false
      matrix:
        include:
          - name: '1'
            tunnel: 'disabled'
            ipfamily: 'ipv4'
            encryption: 'disabled'
            kube-proxy: 'iptables'
            mode: 'kvstoremesh'
            cm-auth-mode-1: 'legacy'
            cm-auth-mode-2: 'legacy'
            maxConnectedClusters: '255'

          - name: '2'
            tunnel: 'disabled'
            ipfamily: 'dual'
            encryption: 'wireguard'
            kube-proxy: 'none'
            mode: 'clustermesh'
            cm-auth-mode-1: 'migration'
            cm-auth-mode-2: 'migration'
            maxConnectedClusters: '511'

          # IPsec encryption cannot be used with BPF NodePort.
          - name: '3'
            tunnel: 'disabled'
            ipfamily: 'dual'
            encryption: 'ipsec'
            kube-proxy: 'iptables'
            mode: 'kvstoremesh'
            cm-auth-mode-1: 'cluster'
            cm-auth-mode-2: 'cluster'
            maxConnectedClusters: '255'

          # IPsec encryption is currently not supported in case of ipv6-only clusters (#23553)
          # Wireguard encryption is currently affected by a bug in case of ipv6-only clusters (#23917)
          - name: '4'
            tunnel: 'disabled'
            ipfamily: 'ipv6'
            encryption: 'disabled'
            kube-proxy: 'none'
            mode: 'clustermesh'
            cm-auth-mode-1: 'legacy'
            cm-auth-mode-2: 'migration'
            maxConnectedClusters: '255'

          # IPsec encryption cannot be used with BPF NodePort.
          - name: '5'
            tunnel: 'disabled'
            ipfamily: 'dual'
            encryption: 'ipsec'
            kube-proxy: 'iptables'
            mode: 'external'
            maxConnectedClusters: '255'

          - name: '6'
            tunnel: 'vxlan'
            ipfamily: 'ipv4'
            encryption: 'disabled'
            kube-proxy: 'none'
            mode: 'clustermesh'
            cm-auth-mode-1: 'cluster'
            cm-auth-mode-2: 'cluster'
            maxConnectedClusters: '255'

          - name: '7'
            tunnel: 'geneve'
            ipfamily: 'dual'
            encryption: 'wireguard'
            kube-proxy: 'iptables'
            mode: 'kvstoremesh'
            cm-auth-mode-1: 'migration'
            cm-auth-mode-2: 'cluster'
            maxConnectedClusters: '511'

          # IPsec encryption cannot be used with BPF NodePort.
          - name: '8'
            tunnel: 'vxlan'
            ipfamily: 'dual'
            encryption: 'ipsec'
            kube-proxy: 'iptables'
            mode: 'clustermesh'
            cm-auth-mode-1: 'cluster'
            cm-auth-mode-2: 'cluster'
            maxConnectedClusters: '255'

        # Tunneling is currently not supported in case of ipv6-only clusters (#17240)
        #  - name: '9'
        #    tunnel: 'vxlan'
        #    ipfamily: 'ipv6'
        #    encryption: 'disabled'
        #    kube-proxy: 'none'
        #    mode: 'kvstoremesh'
        #    cm-auth-mode-1: 'cluster'
        #    cm-auth-mode-2: 'cluster'

          - name: '10'
            tunnel: 'vxlan'
            ipfamily: 'dual'
            encryption: 'wireguard'
            kube-proxy: 'iptables'
            mode: 'external'
            maxConnectedClusters: '511'

    steps:
      - name: Checkout context ref (trusted)
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          ref: ${{ inputs.context-ref || github.sha }}
          persist-credentials: false

      - name: Set Environment Variables
        uses: ./.github/actions/set-env-variables

      - name: Get Cilium's default values
        id: default_vars
        uses: ./.github/actions/helm-default
        with:
          image-tag: ${{ inputs.SHA }}
          chart-dir: ./untrusted/install/kubernetes/cilium

      - name: Set up job variables for GHA environment
        id: vars
        run: |

          CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
            --helm-set=kubeProxyReplacement=${{ matrix.kube-proxy == 'none' }} \
            --helm-set=bpf.masquerade=${{ matrix.kube-proxy == 'none' }} \
            --helm-set=hubble.enabled=true \
            --helm-set=hubble.relay.enabled=true \
            --helm-set=clustermesh.useAPIServer=${{ matrix.mode != 'external' }} \
            --helm-set=clustermesh.apiserver.kvstoremesh.enabled=${{ matrix.mode == 'kvstoremesh' }} \
            --helm-set=clustermesh.maxConnectedClusters=${{ matrix.maxConnectedClusters }} \
            "

          CILIUM_INSTALL_TUNNEL="--helm-set=tunnelProtocol=${{ matrix.tunnel }}"
          if [ "${{ matrix.tunnel }}" == "disabled" ]; then
            CILIUM_INSTALL_TUNNEL="--helm-set-string=routingMode=native \
              --helm-set=autoDirectNodeRoutes=true \
              --helm-set=ipv4NativeRoutingCIDR=10.240.0.0/12 \
              --helm-set=ipv6NativeRoutingCIDR=fd00:10:240::/44"
          fi

          case "${{ matrix.ipFamily }}" in
            ipv4)
              CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=false"
              KIND_POD_CIDR_1="10.242.0.0/16"
              KIND_SVC_CIDR_1="10.243.0.0/16"
              KIND_POD_CIDR_2="10.244.0.0/16"
              KIND_SVC_CIDR_2="10.245.0.0/16"
              ;;
            ipv6)
              CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=false --helm-set=ipv6.enabled=true"
              KIND_POD_CIDR_1="fd00:10:242::/48"
              KIND_SVC_CIDR_1="fd00:10:243::/112"
              KIND_POD_CIDR_2="fd00:10:244::/48"
              KIND_SVC_CIDR_2="fd00:10:245::/112"
              ;;
            dual)
              CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=true"
              KIND_POD_CIDR_1="10.242.0.0/16,fd00:10:242::/48"
              KIND_SVC_CIDR_1="10.243.0.0/16,fd00:10:243::/112"
              KIND_POD_CIDR_2="10.244.0.0/16,fd00:10:244::/48"
              KIND_SVC_CIDR_2="10.245.0.0/16,fd00:10:245::/112"
              ;;
            *)
              echo "Unknown IP family '${{ matrix.ipFamily }}'" && false
              ;;
          esac

          CILIUM_INSTALL_ENCRYPTION=""
          if [ "${{ matrix.encryption }}" != "disabled" ]; then
            CILIUM_INSTALL_ENCRYPTION="--helm-set=encryption.enabled=true \
              --helm-set=encryption.type=${{ matrix.encryption }}"
          fi

          CILIUM_INSTALL_INGRESS=""
          if [ "${{ matrix.kube-proxy }}" == "none" ]; then
            # The ingress controller requires KPR to be enabled.
            CILIUM_INSTALL_INGRESS="--helm-set=ingressController.enabled=true"
          fi

          CONNECTIVITY_TEST_DEFAULTS="--hubble=false \
            --flow-validation=disabled \
            --multi-cluster=${{ env.contextName2 }} \
            --external-target=google.com \
            --include-unsafe-tests \
            --collect-sysdump-on-failure"

          # Skip external traffic (e.g. 1.1.1.1 and www.google.com) tests as IPv6 is not supported
          # in GitHub runners: https://github.com/actions/runner-images/issues/668
          if [[ "${{ matrix.ipFamily }}" == "ipv6" ]]; then
            CONNECTIVITY_TEST_DEFAULTS="$CONNECTIVITY_TEST_DEFAULTS \
              --test='!/pod-to-world' \
              --test='!/pod-to-cidr'"
          fi

          echo cilium_install_defaults="${CILIUM_INSTALL_DEFAULTS} ${CILIUM_INSTALL_TUNNEL} \
            ${CILIUM_INSTALL_IPFAMILY} ${CILIUM_INSTALL_ENCRYPTION} ${CILIUM_INSTALL_INGRESS}" >> $GITHUB_OUTPUT
          echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
          echo sha=${{ steps.default_vars.outputs.sha }} >> $GITHUB_OUTPUT

          echo kind_pod_cidr_1=${KIND_POD_CIDR_1} >> $GITHUB_OUTPUT
          echo kind_svc_cidr_1=${KIND_SVC_CIDR_1} >> $GITHUB_OUTPUT
          echo kind_pod_cidr_2=${KIND_POD_CIDR_2} >> $GITHUB_OUTPUT
          echo kind_svc_cidr_2=${KIND_SVC_CIDR_2} >> $GITHUB_OUTPUT

      - name: Install Cilium CLI
        uses: cilium/cilium-cli@7306e3cdc6caee738157f08e3e1ba26179f104e5 # v0.15.23
        with:
          repository: ${{ env.CILIUM_CLI_RELEASE_REPO }}
          release-version: ${{ env.CILIUM_CLI_VERSION }}
          ci-version: ${{ env.cilium_cli_ci_version }}

      - name: Generate Kind configuration files
        run: |
          K8S_VERSION=${{ env.k8s_version }} \
            PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_1 }} \
            SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_1 }} \
            IPFAMILY=${{ matrix.ipFamily }} \
            KUBEPROXYMODE=${{ matrix.kube-proxy }} \
            envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster1.yaml

          K8S_VERSION=${{ env.k8s_version }} \
            PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_2 }} \
            SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_2 }} \
            IPFAMILY=${{ matrix.ipFamily }} \
            KUBEPROXYMODE=${{ matrix.kube-proxy }} \
            envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster2.yaml

      - name: Create Kind cluster 1
        uses: helm/kind-action@99576bfa6ddf9a8e612d83b513da5a75875caced # v1.9.0
        with:
          cluster_name: ${{ env.clusterName1 }}
          version: ${{ env.kind_version }}
          kubectl_version: ${{ env.k8s_version }}
          config: ./.github/kind-config-cluster1.yaml
          wait: 0 # The control-plane never becomes ready, since no CNI is present

      - name: Create Kind cluster 2
        uses: helm/kind-action@99576bfa6ddf9a8e612d83b513da5a75875caced # v1.9.0
        with:
          cluster_name: ${{ env.clusterName2 }}
          version: ${{ env.kind_version }}
          kubectl_version: ${{ env.k8s_version }}
          config: ./.github/kind-config-cluster2.yaml
          wait: 0 # The control-plane never becomes ready, since no CNI is present

      - name: Label one of the nodes as external to the cluster
        run: |
          kubectl --context ${{ env.contextName1 }} label node \
            ${{ env.clusterName1 }}-worker2 cilium.io/no-schedule=true

      # Make sure that coredns uses IPv4-only upstream DNS servers also in case of clusters
      # with IP family dual, since IPv6 ones are not reachable and cause spurious failures.
      # Additionally, this is also required to workaround #23283.
      - name: Configure the coredns nameservers
        run: |
          COREDNS_PATCH="
          spec:
            template:
              spec:
                dnsPolicy: None
                dnsConfig:
                  nameservers:
                  - 8.8.4.4
                  - 8.8.8.8
          "

          kubectl --context ${{ env.contextName1 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"
          kubectl --context ${{ env.contextName2 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"

      - name: Start kvstore clusters
        id: kvstore
        if: matrix.mode == 'external'
        uses: ./.github/actions/kvstore
        with:
          clusters: 2

      - name: Create the secret containing the kvstore credentials
        if: matrix.mode == 'external'
        run: |
          kubectl --context ${{ env.contextName1 }} create -n kube-system -f ${{ steps.kvstore.outputs.cilium_etcd_secrets_path }}
          kubectl --context ${{ env.contextName2 }} create -n kube-system -f ${{ steps.kvstore.outputs.cilium_etcd_secrets_path }}

      - name: Set clustermesh connection parameters
        if: matrix.mode == 'external'
        id: clustermesh-vars
        run: |
          echo "cilium_install_clustermesh= \
            --set=clustermesh.config.enabled=true \
            --set clustermesh.config.clusters[0].name=${{ env.clusterName1 }} \
            --set clustermesh.config.clusters[1].name=${{ env.clusterName2 }} \
            ${{ steps.kvstore.outputs.cilium_install_clustermesh }} \
          " >> $GITHUB_OUTPUT

      - name: Wait for images to be available
        timeout-minutes: 30
        shell: bash
        run: |
          for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
            until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
          done

      # Warning: since this is a privileged workflow, subsequent workflow job
      # steps must take care not to execute untrusted code.
      - name: Checkout pull request branch (NOT TRUSTED)
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          ref: ${{ steps.vars.outputs.sha }}
          persist-credentials: false
          path: untrusted
          sparse-checkout: |
            install/kubernetes/cilium

      - name: Create the IPSec secret in both clusters
        if: matrix.encryption == 'ipsec'
        run: |
          SECRET="3 rfc4106(gcm(aes)) $(openssl rand -hex 20) 128"
          kubectl --context ${{ env.contextName1 }} create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${SECRET}"
          kubectl --context ${{ env.contextName2 }} create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${SECRET}"

      - name: Install Cilium in cluster1
        id: install-cilium-cluster1
        env:
          KVSTORE_ID: 1
        run: |
          # Explicitly configure the NodePort to make sure that it is different in
          # each cluster, to workaround #24692
          cilium --context ${{ env.contextName1 }} install \
            ${{ steps.vars.outputs.cilium_install_defaults }} \
            --helm-set cluster.name=${{ env.clusterName1 }} \
            --helm-set cluster.id=1 \
            --helm-set clustermesh.apiserver.service.nodePort=32379 \
            --helm-set clustermesh.apiserver.tls.authMode=${{ matrix.cm-auth-mode-1 }} \
            ${{ steps.kvstore.outputs.cilium_install_kvstore }} \
            ${{ steps.clustermesh-vars.outputs.cilium_install_clustermesh }} \
            --nodes-without-cilium=${{ env.clusterName1 }}-worker2

      - name: Copy the Cilium CA secret to cluster2, as they must match
        run: |
          kubectl --context ${{ env.contextName1 }} get secret -n kube-system cilium-ca -o yaml |
            kubectl --context ${{ env.contextName2 }} create -f -

      - name: Install Cilium in cluster2
        env:
          KVSTORE_ID: 2
        run: |
          # Explicitly configure the NodePort to make sure that it is different in
          # each cluster, to workaround #24692
          cilium --context ${{ env.contextName2 }} install \
            ${{ steps.vars.outputs.cilium_install_defaults }} \
            --helm-set cluster.name=${{ env.clusterName2 }} \
            --helm-set cluster.id=${{ matrix.maxConnectedClusters }} \
            --helm-set clustermesh.apiserver.service.nodePort=32380 \
            --helm-set clustermesh.apiserver.tls.authMode=${{ matrix.cm-auth-mode-2 }} \
            ${{ steps.kvstore.outputs.cilium_install_kvstore }} \
            ${{ steps.clustermesh-vars.outputs.cilium_install_clustermesh }}

      - name: Wait for cluster mesh status to be ready
        run: |
          cilium --context ${{ env.contextName1 }} status --wait
          cilium --context ${{ env.contextName2 }} status --wait
          cilium --context ${{ env.contextName1 }} clustermesh status --wait
          cilium --context ${{ env.contextName2 }} clustermesh status --wait

      - name: Connect clusters
        if: matrix.mode != 'external'
        run: |
          cilium --context ${{ env.contextName1 }} clustermesh connect --destination-context ${{ env.contextName2 }}

      - name: Wait for cluster mesh status to be ready
        if: matrix.mode != 'external'
        run: |
          cilium --context ${{ env.contextName1 }} status --wait
          cilium --context ${{ env.contextName2 }} status --wait
          cilium --context ${{ env.contextName1 }} clustermesh status --wait
          cilium --context ${{ env.contextName2 }} clustermesh status --wait

      - name: Port forward Relay
        run: |
          cilium --context ${{ env.contextName1 }} hubble port-forward &
          sleep 10s
          [[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]

      - name: Make JUnit report directory
        run: |
          mkdir -p cilium-junits

      - name: Run connectivity test (${{ join(matrix.*, ', ') }})
        run: |
          cilium --context ${{ env.contextName1 }} connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
          --junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \
          --junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"

      - name: Post-test information gathering
        if: ${{ !success() && steps.install-cilium-cluster1.outcome != 'skipped' }}
        run: |
          cilium --context ${{ env.contextName1 }} status
          cilium --context ${{ env.contextName1 }} clustermesh status
          cilium --context ${{ env.contextName2 }} status
          cilium --context ${{ env.contextName2 }} clustermesh status

          kubectl config use-context ${{ env.contextName1 }}
          kubectl get pods --all-namespaces -o wide
          cilium sysdump --output-filename cilium-sysdump-context1-final-${{ join(matrix.*, '-') }}

          kubectl config use-context ${{ env.contextName2 }}
          kubectl get pods --all-namespaces -o wide
          cilium sysdump --output-filename cilium-sysdump-context2-final-${{ join(matrix.*, '-') }}

          if [ "${{ matrix.mode }}" == "external" ]; then
            for i in {1..2}; do
              echo
              echo "# Retrieving logs from kvstore$i docker container"
              docker logs kvstore$i
            done
          fi
        shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently

      - name: Upload artifacts
        if: ${{ !success() }}
        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-sysdumps-${{ matrix.name }}
          path: cilium-sysdump-*.zip

      - name: Upload JUnits [junit]
        if: ${{ always() }}
        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-junits-${{ matrix.name }}
          path: cilium-junits/*.xml

      - name: Publish Test Results As GitHub Summary
        if: ${{ always() }}
        uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
        with:
          junit-directory: "cilium-junits"

  merge-upload:
    if: ${{ always() }}
    name: Merge and Upload Artifacts
    runs-on: ubuntu-latest
    needs: installation-and-connectivity
    steps:
      - name: Merge Sysdumps
        if: ${{ needs.installation-and-connectivity.result == 'failure' }}
        uses: actions/upload-artifact/merge@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-sysdumps
          pattern: cilium-sysdumps-*
          retention-days: 5
          delete-merged: true
        continue-on-error: true
      - name: Merge JUnits
        uses: actions/upload-artifact/merge@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-junits
          pattern: cilium-junits-*
          retention-days: 5
          delete-merged: true

  commit-status-final:
    if: ${{ always() && github.event_name != 'push' }}
    name: Commit Status Final
    needs: installation-and-connectivity
    runs-on: ubuntu-latest
    steps:
      - name: Set final commit status
        uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
        with:
          sha: ${{ inputs.SHA || github.sha }}
          status: ${{ needs.installation-and-connectivity.result }}
